package org.luosl.webmagicx.handler

import org.luosl.webmagicx.utils.RegexUtils
import us.codecraft.webmagic.{ResultItems, Task}
import org.luosl.webmagicx.conf.MatcherConverters._
import org.luosl.webmagicx.conf.PropType._
import org.luosl.webmagicx.conf.{ConfException, SpiderConf, XmlProps}


class TextHandler(sc:SpiderConf, task:Task, props:XmlProps)  extends AbstractHandler(sc, task, props) {

  private val needProcessCell:List[XmlProps] = props.props("*")

  private val textHandlerCells:List[TextHandlerCell] = needProcessCell.map{ prop=>
    TextHandlerCell(
      prop.tag,
      prop.value("src")(strType),
      prop.value("target")(strType),
      prop.value("expression")(strType),
      prop.valueOption("takeIndex")(intType),
      prop.valueOption("takeContains")(strType),
      prop.valueOption("replaceAs")(strType),
      prop.valueOrDefault("must")(booleanType)(true)
    )
  }

  override def processHandle(items: ResultItems, task:Task): Unit = {
    textHandlerCells.foreach{ cell=>
      if(!items.isSkip){
        val textOpt:Option[String] = Option(items.get(cell.src))
        val processedValue:Any = cell.name match {
          case "replaceAll" => replaceAll(textOpt, cell.expression, cell.replaceAsOpt.getOrElse(""))
          case "replace" => replace(textOpt, cell.expression, cell.replaceAsOpt.getOrElse(""))
          case "replaceFirst" => replaceFirst(textOpt, cell.expression, cell.replaceAsOpt.getOrElse(""))
          case "split" => split(textOpt, cell.expression, cell.takeIndexOpt, cell.takeContainsOpt)
          case "find" => find(textOpt, cell.expression, cell.takeIndexOpt, cell.takeContainsOpt)
          case str:String =>
            throw ConfException(s"TextHandler 暂不支持:$str ")
        }
        val result:String = processedValue match {
          case Some(str:String) => str
          case str:String => str
          case None =>
            if(cell.must) {
              val opDesc:String = s"[name=${cell.name},src=${cell.src},target=${cell.target},expression=${cell.expression}]"
              logInfo(s"TextHandler 执行操作: $opDesc ，未抽取到数据，页面[_url=${items.get("_url")}]将会被跳过其他阶段...")
              items.setSkip(true)
            }
            null
        }
        items.put(cell.target, result)
      }
    }
  }

  /**
    * 替换所有
    * @param textOpt text
    * @param expression expression
    * @param replaceAs replaceAs
    * @return
    */
  def replaceAll(textOpt:Option[String],expression:String,replaceAs:String): Option[String] ={
    textOpt.map(_.replaceAll(expression,replaceAs))
  }

  /**
    * 替换
    * @param textOpt text
    * @param expression expression
    * @param replaceAs replaceAs
    * @return
    */
  def replace(textOpt:Option[String],expression:String,replaceAs:String): Option[String] ={
    textOpt.map(_.replace(expression,replaceAs))
  }

  /**
    * 替换第一个
    * @param textOpt text
    * @param expression expression
    * @param replaceAs replaceAs
    * @return
    */
  def replaceFirst(textOpt:Option[String],expression:String,replaceAs:String): Option[String] ={
    textOpt.map(_.replaceFirst(expression,replaceAs))
  }

  /**
    * 拆分
    * @param textOpt text
    * @param expression expression
    * @param takeIndexOpt takeIndexOpt
    * @param takeContainsOpt takeContainsOpt
    * @return
    */
  def split(textOpt:Option[String],expression:String, takeIndexOpt:Option[Int],takeContainsOpt:Option[String]):Option[String] = {
    textOpt.flatMap{ txt=>
      val splitArr:Array[String] = txt.split(expression)
      indexOrContains(splitArr, takeIndexOpt, takeContainsOpt)
    }
  }

  /**
    * 查找
    * @param textOpt text
    * @param expression expression
    * @param takeIndexOpt takeIndexOpt
    * @param takeContainsOpt takeContainsOpt
    * @return
    */
  def find(textOpt:Option[String],expression:String,takeIndexOpt:Option[Int],takeContainsOpt:Option[String]):Option[String] = {
    textOpt.flatMap{ txt=>
      val findArr:Array[String] = RegexUtils.find(txt, expression).toArray
      indexOrContains(findArr, takeIndexOpt, takeContainsOpt)
    }
  }

  /**
    * 根据索引，或包含字符串查找
    * @param arr arr
    * @param takeIndexOpt takeIndexOpt
    * @param takeContainsOpt takeContainsOpt
    * @return
    */
  private def indexOrContains(arr:Array[String],takeIndexOpt:Option[Int],takeContainsOpt:Option[String]):Option[String] = {
    (takeIndexOpt, takeContainsOpt) match {
      case (Some(index), Some(str)) =>
        if(index < arr.length && arr(index).contains(str)) Option(arr(index)) else None
      case (Some(index), None) => if(index < arr.length) Option(arr(index)) else None
      case (None, Some(str)) => arr.find(_.contains(str))
      case (None, None) =>
        throw ConfException(s"split 标签必须配置 takeIndex 或 takeContains 属性")
    }
  }

}

case class TextHandlerCell(name:String, src:String, target:String, expression:String,
                           takeIndexOpt:Option[Int], takeContainsOpt:Option[String],
                           replaceAsOpt:Option[String], must:Boolean)
